Load Libraries and functions

#setwd("/Volumes/HAVRANEK18/Lab_testing_flasks")  
library(ggplot2)
library (plotly)
library (lubridate)
library(tidyverse)
library(grid)
library(hms)
library(fpp2)
library(zoo)

#derivative function 
deriv <- function(x, y) {
  diffvar <- diff(y) / diff(x)
}

peaks.read <- function(filepath){
    files <-  list.files(path=filepath, recursive = T,full.names = T) 
    #lists the file names in the filepath directory
    tables <- lapply(files, read.table,header = T, stringsAsFactors = F) 
    #reads in the files
    df <- do.call("rbind",tables) #combines all the tables into one dataframe
    df2 <- df[,c(1:2,17:19)] #subsets the dataframe to include only necessary columns
    df3 <- df2[complete.cases(df2),] #subsets the dataframe to include only 
    #complete cases, eliminating rows at the end that aren't complete
    return(df3)
    } #peaks.read is a function that will read files from all sub-folders of the
    #directory named in filepath and combine them into one dataframe

h2o_slope <- function(data,window){
  H2O_Slope_Window <- numeric()
  for (i in 1:nrow(data)){    
    if(i<=window/2){                                                                   
      H2O_Slope_Window[i] = 
        as.numeric(coef(lm(data$H2O[1:(i+window/2)]~
        data$seconds[1:(i+window/2)]))[2])
    }else{
      if(i >= (nrow(data)-window/2)){
        H2O_Slope_Window[i] = 
          as.numeric(coef(lm(data$H2O[(i-window/2):nrow(data)]~
        data$seconds[(i-window/2):nrow(data)]))[2])
      }else{
        H2O_Slope_Window[i] = 
          as.numeric(coef(lm(data$H2O[(i-window/2):(i+window/2)]~
        data$seconds[(i-window/2):(i+window/2)]))[2])       
      }
    }
  }
  return(H2O_Slope_Window)
} #h2o_slope will calculate the slope of h2o vs. experiment second in a moving window  

Load Data

# folder_110418 <- "/Volumes/HAVRANEK18/110418"
# data_110418 <- peaks.read(folder_110418) 
# saveRDS(data_110418, "data_110418.RDS")
data_110418 <- readRDS("/Volumes/HAVRANEK18/Lab_testing_flasks/data/data_110418.RDS")

# I did a lot of work between 5 - 10 pm so I need to bring in the data from the fifth b/c the picarro uses GMT time 

# folder_110518 <- "/Volumes/HAVRANEK18/110518"
# data_110518 <- peaks.read(folder_110518) 
# saveRDS(data_110518, "data_110518.RDS")
data_110518 <- readRDS("/Volumes/HAVRANEK18/Lab_testing_flasks/data/data_110518.RDS")

Add in some Metadata

This chunk (1) creates a date/time column to match my timezone, so I can use my lab notes as a guide for data reduction, (2) adds in a linearity correction column for later comparison, (3) parses the data down to when I was running experiments, and (4) plots that water concentration data

using the instrument specific correction of: This is peice isn’t ready to go yet

Determined by altering push flow: \[\delta^{18}O = -0.0002*[H2O]-20.198\] \[\delta^{2}H = -0.0004*[H2O]-179.69\]

data_110418 <- data_110418 %>% mutate(
  MST = 
    paste(data_110418$DATE, data_110418$TIME) %>% 
    ymd_hms() %>% 
    with_tz(tzone=c("America/Denver"))
)

data_110518 <- data_110518 %>% mutate(
  MST = 
    paste(data_110518$DATE, data_110518$TIME) %>% 
    ymd_hms() %>% 
    with_tz(tzone=c("America/Denver"))
)

evening_110418 <- data_110518 %>% 
  filter( MST < "2018-11-04 23:00:00") %>%  
  mutate (
    lin_d18O = -0.0002*H2O-20.198,
    lin_dD = -0.0004*H2O - 179.69
  )

#Combine data from two different files 
data_110418 <- bind_rows(data_110418, evening_110418)

#Add in linearity correction metadata 
data_110418 <- data_110418 %>%  
  mutate (
    lin_d18O = 1.05*H2O-21000,
    lin_dD = 1.01*H2O - 20000
  )

all_110418 <- ggplot(data_110418, aes(MST, H2O))+
  labs(x="Time (MST)", title="November 04, 2018")+
  geom_point()+
  theme_classic()

all_110418

Plot the filling stage with grob labels

fill_110418 <- evening_110418 %>% 
  filter( MST < "2018-11-04 20:00:00") 

#These labels help me visually in my plots 
label1 <- grobTree(textGrob("Bottle 16", x=0.1,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

label2 <- grobTree(textGrob("Bottle 15", x=0.35,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

label3 <- grobTree(textGrob("Bottle 14", x=0.55,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

label4 <- grobTree(textGrob("Bottle 2", x=0.8,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

label5 <- grobTree(textGrob("Jumper", x=0.9,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

fill_concen_110418 <- ggplot(fill_110418, aes(MST, H2O))+
  labs(x="Time (MST)", title="November 04, 2018")+
  geom_point()+
  theme_classic()+
  annotation_custom(label1)+
  annotation_custom(label2)+
  annotation_custom(label3)+
  annotation_custom(label4)+
  annotation_custom(label5)

print(fill_concen_110418)

Fill flasks with water vapor, and reduce the input values

first_stand <- filter (data_110418, MST > "2018-11-04 16:45:00" & MST < "2018-11-04 16:55:00" )
df1_1 <- data.frame(
  "Time averaged" = 10,
  "Type" = "Correction",
  "valco_position" = 1,
  "mean_d18O" = mean (first_stand$Delta_18_16),
  "sd_d18O" = sd (first_stand$Delta_18_16),
  "mean_dD" = mean(first_stand$Delta_D_H),
  "sd_dD" = sd(first_stand$Delta_D_H),
  "mean_lin_d18O" = mean(first_stand$lin_d18O),
  "mean _lin_dD"= mean(first_stand$lin_dD),
  stringsAsFactors = FALSE
)

fill_16_110418 <- filter(evening_110418, MST > "2018-11-04 17:27:00" & MST < "2018-11-04 17:37:00")
df16 <- data.frame(
  "Time averaged" = 10,
  "Type" = "Fill",
  "valco_position" = 16,
  "mean_d18O" = mean (fill_16_110418$Delta_18_16),
  "sd_d18O" = sd (fill_16_110418$Delta_18_16),
  "mean_dD" = mean(fill_16_110418$Delta_D_H),
  "sd_dD" = sd(fill_16_110418$Delta_D_H),
  "mean_lin_d18O" = mean(fill_16_110418$lin_d18O),
  "mean _lin_dD"= mean(fill_16_110418$lin_dD),
  stringsAsFactors = FALSE
)

fill_15_110418 <- filter (evening_110418, MST > "2018-11-04 18:08:00" & MST < "2018-11-04 18:18:00")
df15 <- data.frame(
  "Time averaged" = 10,
  "Type" = "Fill",
  "valco_position" = 15,
  "mean_d18O" = mean (fill_15_110418$Delta_18_16),
  "sd_d18O" = sd (fill_15_110418$Delta_18_16),
  "mean_dD" = mean(fill_15_110418$Delta_D_H),
  "sd_dD" = sd(fill_15_110418$Delta_D_H),
  "mean_lin_d18O" = mean(fill_15_110418$lin_d18O),
  "mean _lin_dD"= mean(fill_15_110418$lin_dD),
  stringsAsFactors = FALSE
)

fill_14_110418 <- filter (evening_110418, MST > "2018-11-04 18:48:00" & MST < "2018-11-04 18:58:00")
df14 <- data.frame(
  "Time averaged" = 10 ,
  "Type" = "Fill",
  "valco_position" = 14,
  "mean_d18O" = mean (fill_14_110418$Delta_18_16),
  "sd_d18O" = sd (fill_14_110418$Delta_18_16),
  "mean_dD" = mean(fill_14_110418$Delta_D_H),
  "sd_dD" = sd(fill_14_110418$Delta_D_H),
  "mean_lin_d18O" = mean(fill_14_110418$lin_d18O),
  "mean _lin_dD"= mean(fill_14_110418$lin_dD),
  stringsAsFactors = FALSE
)

fill_2_110418 <- filter (evening_110418, MST > "2018-11-04 19:37:00" & MST < "2018-11-04 19:47:00")
df2 <- data.frame(
  "Time averaged" = 10,
  "Type" = "Fill",
  "valco_position" = 2,
  "mean_d18O" = mean (fill_2_110418$Delta_18_16),
  "sd_d18O" = sd (fill_2_110418$Delta_18_16),
  "mean_dD" = mean(fill_2_110418$Delta_D_H),
  "sd_dD" = sd(fill_2_110418$Delta_D_H),
  "mean_lin_d18O" = mean(fill_2_110418$lin_d18O),
  "mean _lin_dD"= mean(fill_2_110418$lin_dD),
  stringsAsFactors = FALSE
)

fill_jumper_110418 <- filter (evening_110418, MST > "2018-11-04 19:50:00" & MST < "2018-11-04 19:59:00")
df1 <- data.frame(
  "Time averaged" = 10,
  "Type" = "Correction",
  "valco_position" = 1,
  "mean_d18O" = mean (fill_jumper_110418$Delta_18_16),
  "sd_d18O" = sd (fill_jumper_110418$Delta_18_16),
  "mean_dD" = mean(fill_jumper_110418$Delta_D_H),
  "sd_dD" = sd(fill_jumper_110418$Delta_D_H),
  "mean_lin_d18O" = mean(fill_jumper_110418$lin_d18O),
  "mean _lin_dD"= mean(fill_jumper_110418$lin_dD),
  stringsAsFactors = FALSE
)

fill_110418_summary <-  bind_rows(df1_1, df16, df15, df14, df2, df1)

Pull out results data and add in time averaging

Out_110418 <- evening_110418 %>% 
  filter( MST >"2018-11-04 20:00:00" & MST < "2018-11-04 22:00:00")

Out_110418$seconds <- as.numeric(row.names(Out_110418))
 
Out_110418 <- Out_110418 %>% 
  mutate(
  m_11= rollmean(H2O, k=11, fill=NA),
  m_25=rollmean(H2O, k=25, fill=NA)) %>% 
  subset(seconds >13 & seconds<(nrow(Out_110418)-13))

Calulate the first derivative and second derivative of water concentration data, then plot the second derivative

#Initialize data frame
Out_110418_fd<-as.data.frame(1:(nrow(Out_110418)-1))

# mutate in the first derivatives, and the rolling first derivatives 
Out_110418_fd<- Out_110418_fd %>% 
  mutate(
    fd = deriv(Out_110418$seconds, Out_110418$H2O),
    fd11=deriv(Out_110418$seconds, Out_110418$m_11),
    fd25=deriv(Out_110418$seconds, Out_110418$m_25)
  )

#Force column names so I can do the second derivative 
colnames(Out_110418_fd, do.NULL = FALSE)
## [1] "1:(nrow(Out_110418) - 1)" "fd"                      
## [3] "fd11"                     "fd25"
colnames(Out_110418_fd)<-c("seconds", "fd", "fd11", "fd25")

#Initialize second derivative data frame
Out_110418_sd <- as.data.frame(1:(nrow(Out_110418_fd)-1))

#Mutate in the second derivatives 
Out_110418_sd <- Out_110418_sd %>% 
  mutate(
    sd=deriv(Out_110418_fd$seconds, Out_110418_fd$fd),
    sd11=deriv(Out_110418_fd$seconds, Out_110418_fd$fd11),
    sd25=deriv(Out_110418_fd$seconds, Out_110418_fd$fd25)
  )

#Force column names of the second derivative df 
colnames(Out_110418_sd, do.NULL = FALSE)
## [1] "1:(nrow(Out_110418_fd) - 1)" "sd"                         
## [3] "sd11"                        "sd25"
colnames(Out_110418_sd)<-c("seconds", "sd", "sd11", "sd25")

sd_110418_plot<-ggplot(Out_110418_sd)+
  geom_point(aes(seconds, sd),colour = "green")+
  geom_point(aes(seconds, sd11),colour = "blue")+
  geom_point(aes(seconds, sd25), colour = "red")+
  annotation_custom(label1)+
  annotation_custom(label2)+
  annotation_custom(label3)+
  annotation_custom(label4)+
  scale_y_continuous(limits = c(-5, 5))+
  labs(x="Seconds", title="second derivative")+
  theme_classic()

print(sd_110418_plot)
## Warning: Removed 4117 rows containing missing values (geom_point).
## Warning: Removed 475 rows containing missing values (geom_point).
## Warning: Removed 502 rows containing missing values (geom_point).

ggplotly(sd_110418_plot)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCustomAnn() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCustomAnn() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCustomAnn() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCustomAnn() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

From this chunk, determine the seconds (in the Out_110418 data frome) where the second derivative of water concentration is approximately 0. Use the first 300 seconds (5 minutes) where the system is draining at steady state. Then feed that into the data frames in the next chunk

Calculate results using times when the second derivative of water concentration ~0

#Create a data frame for each bottle 
results_110418_16 <- filter(Out_110418, seconds > 900 & seconds < 1200)
results_110418_15 <- filter (Out_110418, seconds > 2750 & seconds < 3050)
results_110418_14 <- filter (Out_110418, seconds > 4600 & seconds < 4900)
results_110418_2 <- filter (Out_110418, seconds > 6500 & seconds < 6800)
results_110418_1 <- filter (Out_110418, seconds > 7874 & seconds < 8400)

#Create a data frame for each bottle and then smoosh the dfs together
R16_df <- data.frame(
  "valco_position" = 16,
  "Time averaged" = 5,
  "Type" = "result",
  "mean_d18O" = mean (results_110418_16$Delta_18_16),
  "sd_d18O" = sd (results_110418_16$Delta_18_16),
  "mean_dD" = mean(results_110418_16$Delta_D_H),
  "sd_dD" = sd(results_110418_16$Delta_D_H),
  "mean_lin_d18O" = mean(results_110418_16$lin_d18O),
  "mean _lin_dD"= mean(results_110418_16$lin_dD),
  stringsAsFactors = FALSE
)

R15_df <- data.frame(
  "valco_position" = 15,
  "Time averaged" = 5,
  "Type" = "result",
  "mean_d18O" = mean (results_110418_15$Delta_18_16),
  "sd_d18O" = sd (results_110418_15$Delta_18_16),
  "mean_dD" = mean(results_110418_15$Delta_D_H),
  "sd_dD" = sd(results_110418_15$Delta_D_H),
  "mean_lin_d18O" = mean(results_110418_15$lin_d18O),
  "mean _lin_dD"= mean(results_110418_15$lin_dD),
  stringsAsFactors = FALSE
)

R14_df <- data.frame(
  "valco_position" = 14,
  "Time averaged" = 5,
  "Type" = "result",
  "mean_d18O" = mean (results_110418_14$Delta_18_16),
  "sd_d18O" = sd (results_110418_14$Delta_18_16),
  "mean_dD" = mean(results_110418_14$Delta_D_H),
  "sd_dD" = sd(results_110418_14$Delta_D_H),
  "mean_lin_d18O" = mean(results_110418_14$lin_d18O),
  "mean _lin_dD"= mean(results_110418_14$lin_dD),
  stringsAsFactors = FALSE
)

R2_df <- data.frame(
  "valco_position" = 2,
  "Time averaged" = 5,
  "Type" = "result",
  "mean_d18O" = mean (results_110418_2$Delta_18_16),
  "sd_d18O" = sd (results_110418_2$Delta_18_16),
  "mean_dD" = mean(results_110418_2$Delta_D_H),
  "sd_dD" = sd(results_110418_2$Delta_D_H),
  "mean_lin_d18O" = mean(results_110418_2$lin_d18O),
  "mean _lin_dD"= mean(results_110418_2$lin_dD),
  stringsAsFactors = FALSE
)

R1_df<- data.frame(
  "valco_position" = 1,
  "Time averaged" = 8.76,
  "Type" = "Correction",
  "mean_d18O" = mean (results_110418_1$Delta_18_16),
  "sd_d18O" = sd (results_110418_1$Delta_18_16),
  "mean_dD" = mean(results_110418_1 $Delta_D_H),
  "sd_dD" = sd(results_110418_1 $Delta_D_H),
  "mean_lin_d18O" = mean(results_110418_1$lin_d18O),
  "mean _lin_dD"= mean(results_110418_1$lin_dD),
  stringsAsFactors = FALSE
)

results_110418_summary <- bind_rows(R16_df, R15_df, R14_df, R2_df, R1_df)

Bind together input and results

** Here I need to change this so that it saves to the data_output folders **

Summary_110418 <- bind_rows(fill_110418_summary, results_110418_summary)
print(Summary_110418)
##    Time.averaged       Type valco_position mean_d18O   sd_d18O   mean_dD
## 1          10.00 Correction              1 -25.13749 0.2309458 -185.9443
## 2          10.00       Fill             16 -24.95446 0.2005179 -185.9307
## 3          10.00       Fill             15 -24.99653 0.2035923 -186.3609
## 4          10.00       Fill             14 -24.66604 0.2148824 -186.2368
## 5          10.00       Fill              2 -24.68222 0.2019680 -186.1628
## 6          10.00 Correction              1 -24.86418 0.2346139 -186.2695
## 7           5.00     result             16 -24.95140 0.2352566 -185.5214
## 8           5.00     result             15 -25.21145 0.2136877 -185.5911
## 9           5.00     result             14 -24.97166 0.2326514 -185.1155
## 10          5.00     result              2 -25.03506 0.2178029 -185.9696
## 11          8.76 Correction              1 -23.16637 0.4698788 -186.0463
##        sd_dD mean_lin_d18O mean._lin_dD
## 1  0.6543178    9847.69966    9672.5492
## 2  0.6231383     -25.91508    -191.1242
## 3  0.6309402     -25.71720    -190.7284
## 4  0.6341125     -25.62295    -190.5399
## 5  0.6037736     -25.59207    -190.4781
## 6  0.6054040     -25.56774    -190.4295
## 7  0.6632035     -24.93242    -189.1588
## 8  0.6457631     -24.71026    -188.7145
## 9  0.7057297     -24.59810    -188.4902
## 10 0.7176271     -24.17247    -187.6389
## 11 1.7536300     -25.72425    -190.7425
Summary_110418 <- Summary_110418 %>% mutate(
  analysis = as.numeric(row.names(Summary_110418))
)

Now that I have a summary of my raw data I can move onto corrections

Drift Correction

standards <- filter(Summary_110418, valco_position==1)
standards <- standards %>% mutate(
  diff_d18O = mean_d18O - -25.0,
  diff_dD = mean_dD - -185.0
)

lm.d18O <- lm (data = standards, formula = diff_d18O ~ analysis)
print(lm.d18O)
## 
## Call:
## lm(formula = diff_d18O ~ analysis, data = standards)
## 
## Coefficients:
## (Intercept)     analysis  
##     -0.5720       0.1971
lm.dD <- lm (data = standards, formula = diff_dD ~ analysis)
print (lm.dD)
## 
## Call:
## lm(formula = diff_dD ~ analysis, data = standards)
## 
## Coefficients:
## (Intercept)     analysis  
##     -1.0255      -0.0102

I don’t think either slope is significant enough to do a drift correction on these data.

Linearity Corrrection

using the instrument specific correction of: This is peice isn’t ready to go yet

predetermined equation: ???? JUST A DUMMY EQ TO WORK WITH FOR NOW \[\delta^{18}O = -0.0002*[H2O]-20.198\] \[\delta^{2}H = -0.0004*[H2O]-179.69\]

Temperature Correction